rm(list=ls())
library(dplyr)
library(fst)
library(haven)


data_comp <- 
  read.fst("data_comp_yearly.fst")


years <- seq(1993, 2013, by = 4)

data_comp_local <-
  tibble()

for (k in 1:length(years)){
  year_k <- years[k]
  
  # Load election data for years[k]
  # Select relevant variables
  # recode VALGT_JN to 1/0
  # create empty run variable (useful for naming variables in next chunk)
  candidates_data <-
    read_sas(paste(raw_data, 
                   "grunddata/valgdata/", 
                   "kv", year_k, "_recodes_pnr_afid.sas7bdat", 
                   sep = "")) %>% 
    select(c("PNR", "VALGT_JN", "VALGTYPE", "PARTI")) %>% 
    mutate(VALGT_JN = VALGT_JN == "J",
           run_kv   = NA)
  
  # merge on election data 
  suppressWarnings({
    data_comp_year <-
      data_comp %>%
      filter(year >= (year_k - 3) & year <= year_k) %>% 
      mutate(four_years = year_k) %>%
      group_by(four_years, PNR) %>% 
      summarise(inc_res = mean(res_income, na.rm = TRUE)) %>%
      left_join(., candidates_data, by = "PNR") %>% 
      mutate(run_kv      = !is.na(VALGT_JN))
  })
  
  
  # create variable for whether ppl are in the election year
  
  bef <- 
    read.fst(paste(work_data, "grunddata/", "bef", year_k, ".fst", sep = "")) %>% 
    select("PNR") %>% 
    mutate(in_year = 1)
  
  data_comp_year <- 
    left_join(data_comp_year, bef, by = "PNR") 
  
  data_comp_local <-
    bind_rows(data_comp_local, data_comp_year)
  
  print(k)
}

# Find first year elected in data

election_sequence <- 
  data_comp_local %>% 
  filter(VALGT_JN == TRUE) %>% 
  group_by(PNR) %>%
  summarise(first_elected = min(four_years)) %>%
  ungroup()

data_comp_local <- 
  left_join(data_comp_local, election_sequence) %>%
  mutate(first_elected = ifelse(is.na(first_elected), 
                                0,
                                first_elected))

# Find everyone ever running for parliament -------------------------------

data_comp_parl <- 
  tibble()

# Loop over election years
years <- c(1990, 1994, 1998, 2001, 2005, 2007, 2011, 2015)

for (k in 1:length(years)){
  year_k <- years[k]
  
  # Load election data for years[k]
  # Select relevant variables
  # recode VALGT_JN to 1/0
  # create empty run variable (useful for naming variables in next chunk)
  candidates_data <-
    read_sas(paste(raw_data, 
                   "grunddata/valgdata/", 
                   "fv", year_k, "_recodes_pnr_afid.sas7bdat", 
                   sep = "")) %>% 
    select(c("PNR", "VALGT_JN", "VALGTYPE", "PARTI")) %>% 
    mutate(VALGT_JN = VALGT_JN == "J",
           run_fv   = NA)
  
  # merge on election data 
  suppressWarnings({
    data_comp_year <-
      data_comp %>%
      filter(year >= (year_k - 3) & year <= year_k) %>% 
      mutate(four_years = year_k) %>%
      group_by(four_years, PNR) %>% 
      summarise(inc_res = mean(res_income, na.rm = TRUE)) %>%
      left_join(., candidates_data, by = "PNR") %>% 
      mutate(run_fv      = !is.na(VALGT_JN))
    
  })
  
  # create variable for whether ppl are in the election year
  
  bef <- 
    read.fst(paste("bef", year_k, ".fst", sep = "")) %>% 
    select("PNR") %>% 
    mutate(in_year = 1)
  
  data_comp_year <- 
    left_join(data_comp_year, bef, by = "PNR") 
  
  data_comp_parl <-
    bind_rows(data_comp_parl, data_comp_year)
  
  print(k)
  
  
}

# Find first year elected in data

election_sequence <- 
  data_comp_parl %>% 
  filter(VALGT_JN == TRUE) %>% 
  group_by(PNR) %>%
  summarise(first_elected = min(four_years)) %>%
  ungroup()

data_comp_parl <- 
  left_join(data_comp_parl, election_sequence) %>%
  mutate(first_elected = ifelse(is.na(first_elected), 
                                0,
                                first_elected))




save(data_comp_local, 
     file = "main_data_local.rdata")

save(data_comp_parl, 
     file = "main_data_parl.rdata")
